import numpy as np
import pandas as pd
print('Numpy Version:',np.__version__)
print('Pandas Version:',pd.__version__)
df = pd.read_csv('Forbes_Billionaire.csv')
df.head()
pd.DataFrame(df.groupby('Industries')['Name'].nunique()).sort_values(by='Name',ascending=False).head(8)
df_top8 = pd.DataFrame(df.groupby('Industries')['Name'].nunique()).reset_index().sort_values(by='Name',ascending=False).head(8)
df_top8
df_top8.iloc[0:8,0:1]
df_top8
## Creating a new data frame for storing the values of only the top 8 industries with the maximum billionaires
new_df_top8 = pd.DataFrame(columns=df.columns)
new_df_top8
## Running a loop to get 8 industries with the most number of billionaries
for i in df_top8['Industries']:
a = df[df['Industries']==i] #subsetting the original data frame with these industries
new_df_top8 = new_df_top8.append(a) # appending the subset into the new data frame created in the last steo
new_df_top8
new_df_top8['Industries'].unique()
for i in new_df_top8['Industries'].unique():
print('The average net worth of the billionaires of the industry',i,'is',new_df_top8[new_df_top8['Industries']==i]['Net Worth'].mean(),'billions.')
print('The mean net worth of the billionaires from these industries is:',round(new_df_top8['Net Worth'].mean(),3),'billions.')
for i in new_df_top8['Industries'].unique():
print('The average age of the billionaires of the industry',i,'is',new_df_top8[new_df_top8['Industries']==i]['Age'].mean(),'years.')
print('The mean age of the billionaires from these industries is:',round(new_df_top8['Age'].mean(),2),'years.')
df['Industries'].unique()
df[df['Industries']=='Automotive'].sort_values(by='Net Worth',ascending=False).head(1)
print('The richest billionaires in the Automotive sector is',df[df['Industries']=='Automotive'].sort_values(by='Net Worth',ascending=False).head(1).values[0][1],'with a net worth of',df[df['Industries']=='Automotive'].sort_values(by='Net Worth',ascending=False).head(1).values[0][3],'billions.')
df_top5 = pd.DataFrame(df.groupby('Country')['Name'].nunique()).reset_index().sort_values(by='Name',ascending=False).head(5)
df_top5
df_top5.iloc[0:5,0:1]
mean_age_df = pd.DataFrame(columns=['Country','Mean Age'])
mean_age_df
for i in df_top5['Country']:
mean = df[df['Country']==i]['Age'].mean()
print('The mean age of',i,'is',mean,'years.')
mean_age_df = mean_age_df.append({'Country':i,'Mean Age':mean},ignore_index=True)
mean_age_df.sort_values(by='Mean Age',ascending=True).head(1)
print('The country which has the lowest mean age amongst the top 5 countries with the maximum number of billionaires is',mean_age_df.sort_values(by='Mean Age',ascending=True).head(1).values[0][0],'with an average age of',round(mean_age_df.sort_values(by='Mean Age',ascending=True).head(1).values[0][1],2),'years.')
print('The richest Chinese billionaire is',df[df['Country']=='China'].sort_values(by='Net Worth',ascending=False).head(1).values[0][1],'with a Net Worth of',df[df['Country']=='China'].sort_values(by='Net Worth',ascending=False).head(1).values[0][2],'billions.')
pd.DataFrame(df[df['Country']=='India'].groupby('Industries')['Name'].nunique()).reset_index().sort_values(by='Name',ascending=False).head(5)
print('The top 5 industries with maximum number of Indian billionaires are the following:')
pd.DataFrame(df[df['Country']=='India'].groupby('Industries')['Name'].nunique()).reset_index().sort_values(by='Name',ascending=False).head(5).iloc[0:5,0:1]
top_100 = df.sort_values(by='Net Worth',ascending=False).head(100)
top_100
top_100[top_100['Country']=='India']['Name'].values
Indian_billionaires = df[df['Country']=='India']
Indian_billionaires
print('The names of the Indian Billionaires under the age of 50 are:')
Indian_billionaires[Indian_billionaires['Age']<50]['Name'].values
print('The average Net Worth of Indian Billionaires below the age of 50 is')
round(Indian_billionaires[Indian_billionaires['Age']<50]['Net Worth'].mean(),3)
df[df['Name']=='DONALD TRUMP']
Indian_billionaires_realestate = Indian_billionaires[Indian_billionaires['Industries']=='Real Estate']
Indian_billionaires_realestate
Indian_billionaires_realestate[Indian_billionaires_realestate['Net Worth']>df[df['Name']=='DONALD TRUMP'].values[0][3]]
print('The Indian billionaires from the Real Estate Industry who are richer than Donald Trump are',Indian_billionaires_realestate[Indian_billionaires_realestate['Net Worth']>df[df['Name']=='DONALD TRUMP'].values[0][3]].iloc[0:2,1:2].values[0][0],'and',Indian_billionaires_realestate[Indian_billionaires_realestate['Net Worth']>df[df['Name']=='DONALD TRUMP'].values[0][3]].iloc[0:2,1:2].values[1][0])
df['Net Worth'].sum()